*******************************************************************************
**
** Clean up the AHA survey data 
** This file also outputs a AHA ID - Medicare PN crosswalk to be used later 
**
*******************************************************************************




capture log close 
clear all 
set more off 


** Specify input and output directories 
** The raw directory should contain the AHA extracts 
local fpath_raw_aha = "/homes/nber/shruthi-dua51934/ahaex.work/sacarny_sadun_aha_20190711"
local fpath_output_aha = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/aha/output"

** The input directory contains the variable names we want to read 
local fpath_input_aha = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/aha/input"

log using "`fpath_output_aha'/load_aha_data.log", replace 	

global MAKEDATA = 1 
global MAKEXW = 0
global STARTYEAR = 1999
global ENDYEAR = 2017


* compile a list of variable names
insheet using "`fpath_input_aha'/aha_reduced_varnames.csv", clear
drop if inlist(name, "mapp4", "mapp14", "mapp15", "hcfaid", "mapp17", "mapp6", "mapp9")
keep name 
levelsof name, local(ahavars) clean



* concat the extracts  
* make sure the type matches for each variable in each year 
if $MAKEDATA == 1 { 
	tempfile building
	save `building', emptyok 
	forvalues y = $STARTYEAR(1)$ENDYEAR {
		di "`y'"
		use `ahavars' using "`fpath_raw_aha'/aha_extract`y'.dta", clear
	
		destring hrrcode cntrl serv radmchi chc los mapp* , replace

		* ehlth is not reported every year 
		* if it is reported, treat as a alphanumeric string along with `stringvars'
		capture confirm variable ehlth
		if !_rc {
			tostring ehlth, replace
			}
		
		*append 
		gen year = `y'
		append using `building' 
		save `"`building'"', replace 
		}
	drop if missing(id)
	isid id year 
	save "`fpath_output_aha'/complete_aha.dta", replace 
}

* make the crosswalks: (id-pn) and (id-zipcode)
if $MAKEXW == 1 {
	* read the concatenated aha data 
	use "`fpath_output_aha'/complete_aha.dta", clear 
	preserve 
	keep id mcrnum 
	duplicates drop 

	* drop any missing obs
	drop if mcrnum == "." | mcrnum == ""
	rename mcrnum pn 
	
	* save the id-pn xw 
	save "`fpath_output'/aha_pn_xwalk_all.dta", replace
	restore 
	
	* generate a id to zip xw: 
	keep id mloczip mcrnum year 
	duplicates drop 
	save "`fpath_output'/aha_zip.dta", replace 
}
 


log close 













